xenoprofile: Add IBS support
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 2 Aug 2010 10:00:56 +0000 (11:00 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 2 Aug 2010 10:00:56 +0000 (11:00 +0100)
Add IBS support for AMD family 10h processors. The major
implementation is derived from latest Linux. Two hypercalls are added,
which is necessary for IBS feature detection and user mode parameter
read.

Signed-off-by: Wei Wang <wei.wang2@amd.com>
xen/arch/x86/oprofile/nmi_int.c
xen/arch/x86/oprofile/op_counter.h
xen/arch/x86/oprofile/op_model_athlon.c
xen/arch/x86/oprofile/xenoprof.c
xen/common/xenoprof.c
xen/include/asm-x86/xenoprof.h
xen/include/public/xenoprof.h

index e8d8cee6f9c950ee412686567f3788ce1e858753..853528c99549452bf8c35d74cc25b180a3a51bbd 100644 (file)
@@ -28,6 +28,7 @@
 #include "op_x86_model.h"
  
 struct op_counter_config counter_config[OP_MAX_COUNTER];
+struct op_ibs_config ibs_config;
 
 static struct op_x86_model_spec const *__read_mostly model;
 static struct op_msrs cpu_msrs[NR_CPUS];
@@ -430,6 +431,7 @@ static int __init nmi_init(void)
                        case 0x10:
                                model = &op_athlon_spec;
                                cpu_type = "x86-64/family10";
+                               ibs_caps = ibs_init();
                                break;
                        case 0x11:
                                model = &op_athlon_spec;
index 2880b15c46752e0d14e35a825ebe30d7a6c2f7d3..b515ac9ebc8e379a36917e6f270653d412a87dd7 100644 (file)
@@ -26,4 +26,16 @@ struct op_counter_config {
 
 extern struct op_counter_config counter_config[];
 
+/* AMD IBS configuration */
+struct op_ibs_config {
+    unsigned long op_enabled;
+    unsigned long fetch_enabled;
+    unsigned long max_cnt_fetch;
+    unsigned long max_cnt_op;
+    unsigned long rand_en;
+    unsigned long dispatched_ops;
+};
+
+extern struct op_ibs_config ibs_config;
+
 #endif /* OP_COUNTER_H */
index 343d3e65b4eb5204346188ffc2282bffc2a40545..d1158499306ac8ca4ea0a733bb92d69349983f34 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/regs.h>
 #include <asm/current.h>
 #include <asm/hvm/support.h>
+#include <xen/pci_regs.h>
  
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -47,6 +48,116 @@ static unsigned long reset_value[NUM_COUNTERS];
 
 extern char svm_stgi_label[];
 
+u32 ibs_caps = 0;
+static u64 ibs_op_ctl;
+
+/* IBS cpuid feature detection */
+#define IBS_CPUID_FEATURES              0x8000001b
+
+/* IBS MSRs */
+#define MSR_AMD64_IBSFETCHCTL           0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD         0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD        0xc0011032
+#define MSR_AMD64_IBSOPCTL              0xc0011033
+#define MSR_AMD64_IBSOPRIP              0xc0011034
+#define MSR_AMD64_IBSOPDATA             0xc0011035
+#define MSR_AMD64_IBSOPDATA2            0xc0011036
+#define MSR_AMD64_IBSOPDATA3            0xc0011037
+#define MSR_AMD64_IBSDCLINAD            0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD           0xc0011039
+#define MSR_AMD64_IBSCTL                0xc001103a
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL                  (1LL<<0)
+#define IBS_CAPS_RDWROPCNT              (1LL<<3)
+#define IBS_CAPS_OPCNT                  (1LL<<4)
+
+/* IBS randomization macros */
+#define IBS_RANDOM_BITS                 12
+#define IBS_RANDOM_MASK                 ((1ULL << IBS_RANDOM_BITS) - 1)
+#define IBS_RANDOM_MAXCNT_OFFSET        (1ULL << (IBS_RANDOM_BITS - 5))
+
+/* IbsFetchCtl bits/masks */
+#define IBS_FETCH_RAND_EN               (1ULL<<57)
+#define IBS_FETCH_VAL                   (1ULL<<49)
+#define IBS_FETCH_ENABLE                (1ULL<<48)
+#define IBS_FETCH_CNT                   0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT               0x0000FFFFULL
+
+/* IbsOpCtl bits */
+#define IBS_OP_CNT_CTL                  (1ULL<<19)
+#define IBS_OP_VAL                      (1ULL<<18)
+#define IBS_OP_ENABLE                   (1ULL<<17)
+#define IBS_OP_MAX_CNT                  0x0000FFFFULL
+
+/* IBS sample identifier */
+#define IBS_FETCH_CODE                  13
+#define IBS_OP_CODE                     14
+
+#define clamp(val, min, max) ({                        \
+       typeof(val) __val = (val);              \
+       typeof(min) __min = (min);              \
+       typeof(max) __max = (max);              \
+       (void) (&__val == &__min);              \
+       (void) (&__val == &__max);              \
+       __val = __val < __min ? __min: __val;   \
+       __val > __max ? __max: __val; })
+
+/*
+ * 16-bit Linear Feedback Shift Register (LFSR)
+ */
+static unsigned int lfsr_random(void)
+{
+    static unsigned int lfsr_value = 0xF00D;
+    unsigned int bit;
+
+    /* Compute next bit to shift in */
+    bit = ((lfsr_value >> 0) ^
+           (lfsr_value >> 2) ^
+           (lfsr_value >> 3) ^
+           (lfsr_value >> 5)) & 0x0001;
+
+    /* Advance to next register value */
+    lfsr_value = (lfsr_value >> 1) | (bit << 15);
+
+    return lfsr_value;
+}
+
+/*
+ * IBS software randomization
+ *
+ * The IBS periodic op counter is randomized in software. The lower 12
+ * bits of the 20 bit counter are randomized. IbsOpCurCnt is
+ * initialized with a 12 bit random value.
+ */
+static inline u64 op_amd_randomize_ibs_op(u64 val)
+{
+    unsigned int random = lfsr_random();
+
+    if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
+        /*
+         * Work around if the hw can not write to IbsOpCurCnt
+         *
+         * Randomize the lower 8 bits of the 16 bit
+         * IbsOpMaxCnt [15:0] value in the range of -128 to
+         * +127 by adding/subtracting an offset to the
+         * maximum count (IbsOpMaxCnt).
+         *
+         * To avoid over or underflows and protect upper bits
+         * starting at bit 16, the initial value for
+         * IbsOpMaxCnt must fit in the range from 0x0081 to
+         * 0xff80.
+         */
+        val += (s8)(random >> 4);
+    else
+        val |= (u64)(random & IBS_RANDOM_MASK) << 32;
+
+    return val;
+}
+
 static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 {
        msrs->counters[0].addr = MSR_K7_PERFCTR0;
@@ -101,6 +212,78 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
        }
 }
 
+static inline void
+ibs_log_event(u64 data, struct cpu_user_regs * const regs, int mode)
+{
+       struct vcpu *v = current;
+       u32 temp = 0;
+
+       temp = data & 0xFFFFFFFF;
+       xenoprof_log_event(v, regs, temp, mode, 0);
+       
+       temp = (data >> 32) & 0xFFFFFFFF;
+       xenoprof_log_event(v, regs, temp, mode, 0);
+       
+}
+
+static inline int handle_ibs(int mode, struct cpu_user_regs * const regs)
+{
+       u64 val, ctl;
+       struct vcpu *v = current;
+
+       if (!ibs_caps)
+               return 1;
+
+       if (ibs_config.fetch_enabled) {
+               rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+               if (ctl & IBS_FETCH_VAL) {
+                       rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+                       xenoprof_log_event(v, regs, IBS_FETCH_CODE, mode, 0);
+                       xenoprof_log_event(v, regs, val, mode, 0);
+
+                       ibs_log_event(val, regs, mode);
+                       ibs_log_event(ctl, regs, mode);
+
+                       rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+                       ibs_log_event(val, regs, mode);
+               
+                       /* reenable the IRQ */
+                       ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
+                       ctl |= IBS_FETCH_ENABLE;
+                       wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+               }
+       }
+
+       if (ibs_config.op_enabled) {
+               rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+               if (ctl & IBS_OP_VAL) {
+
+                       rdmsrl(MSR_AMD64_IBSOPRIP, val);
+                       xenoprof_log_event(v, regs, IBS_OP_CODE, mode, 0);
+                       xenoprof_log_event(v, regs, val, mode, 0);
+                       
+                       ibs_log_event(val, regs, mode);
+
+                       rdmsrl(MSR_AMD64_IBSOPDATA, val);
+                       ibs_log_event(val, regs, mode);
+                       rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+                       ibs_log_event(val, regs, mode);
+                       rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+                       ibs_log_event(val, regs, mode);
+                       rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+                       ibs_log_event(val, regs, mode);
+                       rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+                       ibs_log_event(val, regs, mode);
+
+                       /* reenable the IRQ */
+                       ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
+                       wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
+               }
+       }
+
+    return 1;
+}
+
 static int athlon_check_ctrs(unsigned int const cpu,
                             struct op_msrs const * const msrs,
                             struct cpu_user_regs * const regs)
@@ -134,10 +317,51 @@ static int athlon_check_ctrs(unsigned int const cpu,
                }
        }
 
+       ovf = handle_ibs(mode, regs);
        /* See op_model_ppro.c */
        return ovf;
 }
 
+static inline void start_ibs(void)
+{
+       u64 val = 0;
+
+       if (!ibs_caps)
+               return;
+
+       if (ibs_config.fetch_enabled) {
+               val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
+               val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+               val |= IBS_FETCH_ENABLE;
+               wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
+       }
+
+       if (ibs_config.op_enabled) {
+               ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+               if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
+                       /*
+                        * IbsOpCurCnt not supported.  See
+                        * op_amd_randomize_ibs_op() for details.
+                        */
+                       ibs_op_ctl = clamp((unsigned long long)ibs_op_ctl, 
+                                                       0x0081ULL, 0xFF80ULL);
+               } else {
+                       /*
+                        * The start value is randomized with a
+                        * positive offset, we need to compensate it
+                        * with the half of the randomized range. Also
+                        * avoid underflows.
+                        */
+               ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
+                                       IBS_OP_MAX_CNT);
+               }
+               if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
+                       ibs_op_ctl |= IBS_OP_CNT_CTL;
+               ibs_op_ctl |= IBS_OP_ENABLE;
+               val = op_amd_randomize_ibs_op(ibs_op_ctl);
+               wrmsrl(MSR_AMD64_IBSOPCTL, val);
+       }
+}
  
 static void athlon_start(struct op_msrs const * const msrs)
 {
@@ -150,8 +374,22 @@ static void athlon_start(struct op_msrs const * const msrs)
                        CTRL_WRITE(msr_content, msrs, i);
                }
        }
+       start_ibs();
 }
 
+static void stop_ibs(void)
+{
+       if (!ibs_caps)
+               return;
+
+       if (ibs_config.fetch_enabled)
+               /* clear max count and enable */
+               wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
+
+       if (ibs_config.op_enabled)
+               /* clear max count and enable */
+               wrmsrl(MSR_AMD64_IBSOPCTL, 0);
+}
 
 static void athlon_stop(struct op_msrs const * const msrs)
 {
@@ -165,8 +403,114 @@ static void athlon_stop(struct op_msrs const * const msrs)
                CTRL_SET_INACTIVE(msr_content);
                CTRL_WRITE(msr_content, msrs, i);
        }
+
+       stop_ibs();
+}
+
+#define IBSCTL_LVTOFFSETVAL             (1 << 8)
+#define APIC_EILVT_MSG_NMI              0x4
+#define APIC_EILVT_LVTOFF_IBS           1
+#define APIC_EILVTn(n)                  (0x500 + 0x10 * n)
+static inline void init_ibs_nmi_per_cpu(void *arg)
+{
+       unsigned long reg;
+
+       reg = (APIC_EILVT_LVTOFF_IBS << 4) + APIC_EILVTn(0);
+       apic_write(reg, APIC_EILVT_MSG_NMI << 8);
+}
+
+#define PCI_VENDOR_ID_AMD               0x1022
+#define PCI_DEVICE_ID_AMD_10H_NB_MISC   0x1203
+#define IBSCTL                          0x1cc
+static int init_ibs_nmi(void)
+{
+       int bus, dev, func;
+       u32 id, value;
+       u16 vendor_id, dev_id;
+       int nodes;
+
+       /* per CPU setup */
+       on_each_cpu(init_ibs_nmi_per_cpu, NULL, 1);
+
+       nodes = 0;
+       for (bus = 0; bus < 256; bus++) {
+               for (dev = 0; dev < 32; dev++) {
+                       for (func = 0; func < 8; func++) {
+                               id = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
+
+                               if ((id == 0xffffffff) || (id == 0x00000000) ||
+                                       (id == 0x0000ffff) || (id == 0xffff0000))
+                                       continue;
+
+                               vendor_id = id & 0xffff;
+                               dev_id = (id >> 16) & 0xffff;
+
+                               if ((vendor_id == PCI_VENDOR_ID_AMD) &&
+                                       (dev_id == PCI_DEVICE_ID_AMD_10H_NB_MISC)) {
+
+                                       pci_conf_write32(bus, dev, func, IBSCTL,
+                                               IBSCTL_LVTOFFSETVAL | APIC_EILVT_LVTOFF_IBS);
+
+                                       value = pci_conf_read32(bus, dev, func, IBSCTL);
+
+                                       if (value != (IBSCTL_LVTOFFSETVAL |
+                                               APIC_EILVT_LVTOFF_IBS)) {
+                                               printk("Xenoprofile: Failed to setup IBS LVT offset, "
+                                                       "IBSCTL = 0x%08x", value);
+                                               return 1;
+                                       }
+                                       nodes++;
+                               }
+                       }
+               }
+       }
+
+       if (!nodes) {
+               printk("Xenoprofile: No CPU node configured for IBS");
+               return 1;
+       }
+
+       return 0;
+}
+
+static u32 get_ibs_caps(void)
+{
+       unsigned int max_level;
+
+       if (!boot_cpu_has(X86_FEATURE_IBS))
+               return 0;
+
+    /* check IBS cpuid feature flags */
+       max_level = cpuid_eax(0x80000000);
+       if (max_level < IBS_CPUID_FEATURES)
+               return IBS_CAPS_AVAIL;
+
+       ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
+       if (!(ibs_caps & IBS_CAPS_AVAIL))
+               /* cpuid flags not valid */
+               return IBS_CAPS_AVAIL;
+
+       return ibs_caps;
 }
 
+u32 ibs_init(void)
+{
+       u32 ibs_caps = 0;
+
+       ibs_caps = get_ibs_caps();
+
+       if ( !ibs_caps )
+               return 0;
+
+       if (init_ibs_nmi()) {
+               ibs_caps = 0;
+               return 0;
+       }
+
+       printk("Xenoprofile: AMD IBS detected (0x%08x)\n",
+               (unsigned)ibs_caps);
+       return ibs_caps;
+}
 
 struct op_x86_model_spec const op_athlon_spec = {
        .num_counters = NUM_COUNTERS,
index da62f991e928895cbd9b90d8ad80eb134837297d..0c1c7ab031b7e689cec3bc0bd9a881e3739a59ef 100644 (file)
@@ -38,6 +38,23 @@ int xenoprof_arch_counter(XEN_GUEST_HANDLE(void) arg)
     return 0;
 }
 
+int xenoprof_arch_ibs_counter(XEN_GUEST_HANDLE(void) arg)
+{
+    struct xenoprof_ibs_counter ibs_counter;
+
+    if ( copy_from_guest(&ibs_counter, arg, 1) )
+        return -EFAULT;
+
+    ibs_config.op_enabled = ibs_counter.op_enabled;
+    ibs_config.fetch_enabled = ibs_counter.fetch_enabled;
+    ibs_config.max_cnt_fetch = ibs_counter.max_cnt_fetch;
+    ibs_config.max_cnt_op = ibs_counter.max_cnt_op;
+    ibs_config.rand_en = ibs_counter.rand_en;
+    ibs_config.dispatched_ops = ibs_counter.dispatched_ops;
+
+    return 0;
+}
+
 #ifdef CONFIG_COMPAT
 int compat_oprof_arch_counter(XEN_GUEST_HANDLE(void) arg)
 {
index 9390d8dc9b96bc1742263c74d85e97386faee488..ab95d92aafe66be8c6c8602710d151b6d6cc20ec 100644 (file)
@@ -49,6 +49,9 @@ static u64 passive_samples;
 static u64 idle_samples;
 static u64 others_samples;
 
+/* AMD IBS support */
+extern u32 ibs_caps;
+
 int acquire_pmu_ownership(int pmu_ownship)
 {
     spin_lock(&pmu_owner_lock);
@@ -881,6 +884,20 @@ int do_xenoprof_op(int op, XEN_GUEST_HANDLE(void) arg)
             ret = -EFAULT;
         break;
 
+    case XENOPROF_ibs_counter:
+        if ( (xenoprof_state != XENOPROF_COUNTERS_RESERVED) ||
+             (adomains == 0) )
+        {
+            ret = -EPERM;
+            break;
+        }
+        ret = xenoprof_arch_ibs_counter(arg);
+        break;
+
+    case XENOPROF_get_ibs_caps:
+        ret = ibs_caps;
+        break;
+
     default:
         ret = -ENOSYS;
     }
index be4e74241dbc3205ec1f1d2a9dd8062f823858b4..8431379d9569b26a2914f01cd7ab3238a336e5a5 100644 (file)
@@ -42,10 +42,15 @@ int xenoprof_arch_init(int *num_events, char *cpu_type);
 
 int xenoprof_arch_counter(XEN_GUEST_HANDLE(void) arg);
 int compat_oprof_arch_counter(XEN_GUEST_HANDLE(void) arg);
+int xenoprof_arch_ibs_counter(XEN_GUEST_HANDLE(void) arg);
 
 struct vcpu;
 struct cpu_user_regs;
 
+/* AMD IBS support */
+u32 ibs_init(void);
+extern u32 ibs_caps;
+
 int xenoprofile_get_mode(struct vcpu *v, struct cpu_user_regs * const regs);
 
 static inline int xenoprof_backtrace_supported(void)
index 183078d8db8c7deeeebf87864001482d53698be1..346d6c514ca306b5175d241537689105204022bc 100644 (file)
 #define XENOPROF_shutdown           13
 #define XENOPROF_get_buffer         14
 #define XENOPROF_set_backtrace      15
-#define XENOPROF_last_op            15
+
+/* AMD IBS support */
+#define XENOPROF_get_ibs_caps       16
+#define XENOPROF_ibs_counter        17
+#define XENOPROF_last_op            17
 
 #define MAX_OPROF_EVENTS    32
 #define MAX_OPROF_DOMAINS   25
@@ -124,6 +128,16 @@ typedef struct xenoprof_passive {
 } xenoprof_passive_t;
 DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t);
 
+struct xenoprof_ibs_counter {
+    uint64_t op_enabled;
+    uint64_t fetch_enabled;
+    uint64_t max_cnt_fetch;
+    uint64_t max_cnt_op;
+    uint64_t rand_en;
+    uint64_t dispatched_ops;
+};
+typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t);
 
 #endif /* __XEN_PUBLIC_XENOPROF_H__ */